Początkowe ustawienia

Css

.my_button {
  background-color: #caa8b1;
  color: #3c2a34;
  padding: 10px;
  font-size: 14px;
  border: none;
  cursor: pointer;
  width: 70px;
}
h1, h2, h3, h4, h5, h6 {
      text-align: center;
      color: #e0cdd6;
    }
body {
  background-color: #3c2a34;
}
table {
   margin:1em auto;
   color: #e0cdd6;
}

Definiowanie ścieżek

project_path = "D:/moje/projekty/chess-in-the-digital-age"
setwd(paste(project_path,"/presentation", sep = ""))
Dataset_path = paste(project_path,"/Dataset", sep = "")
selected_date = "2024-04"

knitr::opts_chunk$set(root= paste(project_path,"/presentation", sep = ""))

Definiowanie paczek

library(pander)
panderOptions('digits',7)
library(knitr)
library(rvest)
library(stringi)
library(dplyr)
library(scales)
library(ggplot2)
library(png)
library("patchwork") 
library(forcats)
img <- readPNG("../pictures/queens_gambit.png", native = TRUE)
img2 = readPNG("../pictures/candidates_tournament.png", native = TRUE)
img3 = readPNG("../pictures/covid.png", native = TRUE)
img4 = readPNG("../pictures/opening_icon.png", native = TRUE)

Wybór tablicy do scrapowania

url = "https://database.lichess.org"
path= "/html/body/div/div[2]/div/section[1]/table"
wezel = html_node(read_html(url), xpath=path)

Java-script dla przycisku

const div_my_init = document.getElementById("my_init");
  div_init.style.display = 'none';
  
var button_my_init = document.getElementById("button_my_init");
  
function hideMy_init() {
  if (div_my_init.style.display === 'none') {
    div_my_init.style.display = 'block';
    button_my_init.innerHTML = "Ukryj";
  } else {
    div_my_init.style.display = 'none';
    button_my_init.innerHTML = "Pokaż";
  }
}

Czy szachy są coraz popularniejsze?

Scrapowanie danych z lichess

# wybor tabeli z iloscia zagranych partii wedlug miesiaca
games_count_html = html_table(wezel)[[3]]

my_games_count <- data.frame(count=as.numeric(stri_replace_all(games_count_html,"",regex = "\\,"))[-length(games_count_html)])

# wybor lat do analizy na podstawie linkow .zst.torrent
hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")
links = data.frame(links = paste(url,my_href,sep="/"))

init_links_for_download = links[seq(2,nrow(links), by=2), 1]

# wyodrebnianie lat z linkow

my_year_from_imported_links = data.frame(year=data.frame(stri_match_all(data.frame(init_links_for_download),
                                   regex = "rated_\\s*(.*?)\\s*[-]"))[,2])


Data_games_count = cbind(my_year_from_imported_links,my_games_count)

# szukanie indeksow rozpoczynajacych nowy rok w zbiorze
first_index_of_years=c()

for(i in unique(my_year_from_imported_links)[,]){
first_index_of_years = append(first_index_of_years,which(rev(Data_games_count$year)==i)[1])
}

Wizualizacja

my_plot = ggplot(Data_games_count, aes(x = rev(1:nrow(my_games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games", title = "Number of chess games played on the lichess website") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(my_year_from_imported_links)[,]))) +
  scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))
my_plot

Szukanie powodów znacznego wzrostu popularności szachów w 2020 roku

# strzalka

my_plot + geom_segment(aes(70, 68000000, xend = 86, yend = 62000000),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  
  # zdjecia
    inset_element(p = img, 
                left = 0.02, 
                bottom = 0.54, 
                right = 0.42, 
                top = 0.9) +
  inset_element(p = img2, 
                left = 0.05, 
                bottom = 0.29, 
                right = 0.5, 
                top = 0.52)+
  inset_element(p = img3, 
                left = 0.44, 
                bottom = 0.69, 
                right = 0.65, 
                top = 0.82)

Wnioski:

  • Szachy zyskują na popularności, ale czy na pewno dzięki internetowi i cyfryzacji?

Jak wpłynął serial “Gambit królowej” na szachy?

Ciekawostka:

  • Gambit królowej w szachach to nazwa debiutu szachowego. Debiut szachowy z kolei to początkowe posunięcia w szachach.

Gambit Królowej

Ile było granych debiutów “Gambit królowej”?

Tworzenie struktury zbioru danych

wyodrębnianie tagów oraz hiperłączy

hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")

pander(head(bind_rows(lapply(html_attrs(hyperlinks), function(x) data.frame(as.list(x), stringsAsFactors=FALSE)))))
href
standard/lichess_db_standard_rated_2024-05.pgn.zst
standard/lichess_db_standard_rated_2024-05.pgn.zst.torrent
standard/lichess_db_standard_rated_2024-04.pgn.zst
standard/lichess_db_standard_rated_2024-04.pgn.zst.torrent
standard/lichess_db_standard_rated_2024-03.pgn.zst
standard/lichess_db_standard_rated_2024-03.pgn.zst.torrent

Wybór wersji pobierania (.zst.torrent) w funkcji ograniczającej scrapowanie względem wybranej daty

choose_max_date_to_scrap = function(date){
  
  # co drugi link to .zst.torrent wiec go wyodrebniam
  init_links_for_download = links[seq(2,nrow(links), by=2), 1]
  
  full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
  
  date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  my_links_for_download = init_links_for_download[c(date_location:length(init_links_for_download))]
  
  return(my_links_for_download)
}

links_for_download = choose_max_date_to_scrap(selected_date)
pander(data.frame(prepared_links = head(links_for_download)))
prepared_links
https://database.lichess.org/standard/lichess_db_standard_rated_2024-04.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-03.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-02.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-01.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2023-12.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2023-11.pgn.zst.torrent

Wyodrębianie daty ze scrapowania

full_date_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(full_date_from_imported_links)))
full_date_from_imported_links
2024-04
2024-03
2024-02
2024-01
2023-12
2023-11

Wyodrębianie roku, miesięcy, nazw miesięcy ze scrapowania

year_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[-]"))[,2]
pander(head(data.frame(year_from_imported_links)))
year_from_imported_links
2024
2024
2024
2024
2023
2023
month_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "-\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(month_from_imported_links)))
month_from_imported_links
04
03
02
01
12
11
my_month_names = month.abb[as.integer(month_from_imported_links)]
pander(head(data.frame(my_month_names)))
my_month_names
Apr
Mar
Feb
Jan
Dec
Nov

Tworzenie struktury plików - lata

my_paths_year <- unique(paste(Dataset_path,"/",year_from_imported_links, sep=""))
pander(data.frame(my_paths_year))
my_paths_year
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023
D:/moje/projekty/chess-in-the-digital-age/Dataset/2022
D:/moje/projekty/chess-in-the-digital-age/Dataset/2021
D:/moje/projekty/chess-in-the-digital-age/Dataset/2020
D:/moje/projekty/chess-in-the-digital-age/Dataset/2019
D:/moje/projekty/chess-in-the-digital-age/Dataset/2018
D:/moje/projekty/chess-in-the-digital-age/Dataset/2017
D:/moje/projekty/chess-in-the-digital-age/Dataset/2016
D:/moje/projekty/chess-in-the-digital-age/Dataset/2015
D:/moje/projekty/chess-in-the-digital-age/Dataset/2014
D:/moje/projekty/chess-in-the-digital-age/Dataset/2013
create_folders_year = function(){
  for(i in 1:length(my_paths_year)){
    dir.create(my_paths_year[i])
  }
}

Tworzenie struktury plików - miesiące

my_paths_month <- paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names, sep="")
pander(head(data.frame(my_paths_month)))
my_paths_month
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov
create_folders_month = function(){
  for(i in 1:length(my_paths_month)){
    dir.create(my_paths_month[i])
  }
}

Definiowanie ścieżek do których będą pobierane pliki i definiowanie ich docelowych nazw

my_paths = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst.torrent", sep="")
pander(head(data.frame(my_paths)))
my_paths
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr/lichess_db_standard_rated_2024-04.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar/lichess_db_standard_rated_2024-03.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb/lichess_db_standard_rated_2024-02.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan/lichess_db_standard_rated_2024-01.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec/lichess_db_standard_rated_2023-12.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov/lichess_db_standard_rated_2023-11.pgn.zst.torrent
downloading = function(){
  create_folders_year()
  create_folders_month()
  for(j in 1:length(my_paths)){
    download.file(links_for_download[j], my_paths[j], mode="wb",Sys.sleep(0.2))
  }
}

Tworzenie struktury plikow i pobieranie w niej zbiorów danych

downloading()

Java-script dla przycisku

const my_div1 = document.getElementById("tworzenie-struktury");
  my_div1.style.display = 'none';
  
var button = document.getElementById("my_button");
  
function myFunction() {
  if (my_div1.style.display === 'none') {
    my_div1.style.display = 'block';
    button.innerHTML = "Ukryj";
  } else {
    my_div1.style.display = 'none';
    button.innerHTML = "Pokaż";
  }
}

Przygotowywanie zbiorów danych do użycia

Pobieranie informacji o liczbie partii poprzez scrapowanie oraz ograniczenie tego scrapowania względem wybranej daty

choose_max_date_to_scrap_game_count = function(date){
  # wybor tabeli do analizy
  my_games_count = html_table(wezel)[[3]]
  
  # wybor lat do analizy na podstawie linkow .zst.torrent
  init_links_for_download = links[seq(2,nrow(links), by=2), 1]
  
  # wyodrebnianie dat z linkow
  full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
  
  # szukanie indeksow z datami odpowiadajacymi wybranej dacie
  date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # wyswietlanie liczb gier dla wybranych dat
  games_count = my_games_count[c(date_location:length(init_links_for_download))]
  
  return(games_count)
}

games_count = choose_max_date_to_scrap_game_count(selected_date)
pander(head(data.frame(games_count)))
games_count
91,383,489
95,810,349
91,628,934
99,001,912
96,909,211
92,389,636

funkcja do ograniczania danych dla konkretnego zbioru względem miesiąca

preparing_month_dataset = function(date, data_size = 0.001){
  
  # szukanie indeksu wybranej daty
  searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # szukanie liczby meczy dla wybranej daty i ograniczanie jej wzgledem wyszukanych wartosci
  decreasing_game_number <- round((as.numeric(stri_replace_all(games_count,"",regex = "\\,"))*18*data_size)[searching_location])
  
  # definiowanie pliku do przerobki na podstawie wybranej daty
  pgn_file_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
  
  # wczytanie pliku do zdefiniowanej dlugosci
  my_pgn <- read.table(pgn_file_to_read,
                   quote="", sep="\n", stringsAsFactors=FALSE, nrows = decreasing_game_number)
  
  # szukanie ostatnich 22 nazw kolumn
  colnms <- sub("\\[(\\w+).+", "\\1", my_pgn[(decreasing_game_number-22):decreasing_game_number,1])
  
  # szukanie indeksow rozpoczynajacych nowe partie
  Event_location = which(stri_detect(colnms, regex = "Event") == TRUE) 
  
  # usuwanie danych pod wyszukanym indeksie
  my_pgn2 = my_pgn[1:(decreasing_game_number-22+Event_location-2),]
  
  # zapis przetworzonego zbioru do pliku
  pgn_file_to_write = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
  
  write.table(my_pgn2,pgn_file_to_write,col.names = FALSE,row.names = FALSE,quote = FALSE)
  
  test_path = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
  
  # tworzenie pliku dla statusu pracy
  file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/ograniczono_dane.txt", sep="")[searching_location]
  
  write.table(" ",file_txt_for_info)
  
  testing_last_char <- read.table(test_path,
                   quote="", sep="\n", stringsAsFactors=FALSE)
  return(tail(testing_last_char))
}

funkcja do usuwania oryginalnego zbioru danych względem miesiąca

removing_oryginal_dataset = function(date){
  # szukanie indeksu wybranej daty
  searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # definiowanie plikow do usuniecia
  file_pgn_zst_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst", sep="")[searching_location]
  
  file_pgn_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
  
  # tworzenie pliku dla statusu pracy
  file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/usunieto_oryginal.txt", sep="")[searching_location]
  
  file.remove(file_pgn_zst_to_remove)
  
  file.remove(file_pgn_to_remove)
  
  write.table(" ",file_txt_for_info)
}

Wywoływanie funkcji przygotowującej zbiór z konkretnego miesiąca do późniejszej analizy

preparing_month_dataset("2013-01")

removing_oryginal_dataset("2013-01")

Java-script dla przycisku

const my_div2 = document.getElementById("przygotowywanie-zbiorow");
  my_div2.style.display = 'none';
  
var button2 = document.getElementById("my_button2");
  
function myFunction2() {
  if (my_div2.style.display === 'none') {
    my_div2.style.display = 'block';
    button2.innerHTML = "Ukryj kod";
  } else {
    my_div2.style.display = 'none';
    button2.innerHTML = "Pokaż kod";
  }
}

Liczenie zagranych debiutów “Gambit królowej” na przestrzeni lat

# komenda szukajaca powtarzalnosc danej kolumny
# zmienne = fct_count(fct_infreq(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1])))

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

Percent_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  opening_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "Opening")
  
  All_games_count = length(pgn.df[opening_index,])
  
  Queens_Gambit_games_count = length(which(stri_detect(pgn.df[opening_index,], regex = "Queen's Gambit") == TRUE))

  Percent_games_count = append(Percent_games_count,(Queens_Gambit_games_count/All_games_count)*100)
}

Przewidywania:

  • Skoro termin “Gambit królowej” zyskał rozgłos dzięki serialowi, to debiut o tej nazwie powinien występować częściej po 2020 roku procentowo w stosunku do całości.

Wizualizacja

my_Percent_games_count <- data.frame(count=Percent_games_count)

Data_Queens_gambit_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_games_count)

ggplot(Data_Queens_gambit_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of 'Queen's Gambit' openings played") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_Queens_gambit_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1)) 

Obserwacje:

  • Widać wyraźnie malejący stosunek granych debiutów “Gambit królowej”, po dacie premiery serialu.

Wnioski:

  • Serial znacząco wpłynął na osoby, które nie wiedzą co znaczy termin “Gambit królowej”.

Czy serial wpłynął tylko na nowicjuszy szachowych?

Wyszukiwanie Ilości zagranych partii przez nowicjuszy (ranga < 1200)

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

novice_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  elo_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
  
  All_games_count = length(pgn.df[elo_index,])
  
  my_novice_games_count = length(which(as.numeric(pgn.df[elo_index,]) < 1200) == TRUE)

  novice_games_count = append(novice_games_count,(my_novice_games_count/All_games_count)*100)
}

Wizualizacja

my_Percent_novice_games_count <- data.frame(count=Percent_novice_games_count)

Data_novice_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_novice_games_count)

ggplot(Data_novice_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of novice games (rank < 1200) ") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_novice_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(58, 10.68, xend = 95, yend = 10),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  
  # zdjecia
    inset_element(p = img, 
                left = 0.02, 
                bottom = 0.54, 
                right = 0.42, 
                top = 0.9)

Obserwacje:

  • Widać znaczny napływ nowicjuszy po dacie premiery serialu

Wyszukiwanie Ilości zagranych partii przez zaawansowanych graczy (ranga > 1800)

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

pro_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  elo_index_v2 = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
  
  All_games_count_v2 = length(pgn.df[elo_index_v2,])
  
  my_pro_games_count = length(which(as.numeric(pgn.df[elo_index_v2,]) > 1800) == TRUE)

  pro_games_count = append(pro_games_count,(my_pro_games_count/All_games_count_v2)*100)
}

Wizualizacja

my_Percent_pro_games_count <- data.frame(count=Percent_pro_games_count)

Data_pro_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_pro_games_count)

ggplot(Data_pro_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of pro games (rank > 1800) ") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_pro_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(65, 38, xend = 86, yend = 37),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  inset_element(p = img3, 
                left = 0.39, 
                bottom = 0.84, 
                right = 0.6, 
                top = 0.97)

Obserwacje:

  • Widać spadek ilości zaawansowanych graczy po 2020 roku w stosunku do całości

Wnioski:

  • Serial wpłynął na popularyzacje szachów, głównie zachęcając do gry nowicjuszy.

Dla czego serial nie wpłynął na zaawansowanych graczy?

Czy dodanie do szachów nowości zwiększy popularność wśród zaawansowanych graczy?

Tworzenie nowego debiutu

Statystyki popularności Anny Cramling na twitch

Wyszukiwanie debiutów po sekwencji posunięć, zamiast po nazwie

#files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

search_by_moves = function(searching_moves){
  opening_count=c()
  
  for(i in 1:length(files_to_read)){
    pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
    
    moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
    
    All_games_count = length(pgn[moves,])
    
    for(j in 1:length(searching_moves)){
      my_regex = paste("(.*?)\\s*",j,". (\\w+).+", sep = "")
      moves_to_compare = sub(my_regex, "\\2", pgn[moves,])
      moves = moves[which(moves_to_compare == searching_moves[j])]
    }
  
    opening_count = append(opening_count,(length(moves)/All_games_count)*100)
  }
  return(opening_count)
}

search_by_moves(c("e3","d3","Ne2","Nd2","Ng3"))

Wizualizacja

my_opening_count <- data.frame(count=opening_count)

Data_opening_count = cbind(data.frame(year=year_from_imported_links),my_opening_count)

ggplot(Data_opening_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games", title = "Number of 'Cow' move sequences played") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_opening_count$year)) +
  expand_limits(x = c(0, NA), y = c(0, 10)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(80, 3.2, xend = 123, yend = 2.2),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  inset_element(p = img4, 
                left = 0.42, 
                bottom = 0.34, 
                right = 0.75, 
                top = 0.51)

Obserwacje:

  • Widać wzrost popularności streamerki po opublikowaniu nowego debiutu oraz widać wzrost zainteresowania jej stworzoną sekwencją szachową

Wnioski:

  • Ciężko wpłynąć na zaawansowanych graczy. Rolą internetu jest to zmienić.

Podsumowanie